#Author Martina Bradic
#This script performs a comparison between baseline and on-trx paires.
#First we cluster all the samples baseline and on-trx and determine their immune type; Supplemental Figure 17
#we then compare changes in immune proportion, TE expression and IKZF1 expression between pairs pre and post treatment
# this script generates Figure 6, and Table 1, as well as all associated statistics
library(data.table)
## data.table 1.15.4 using 1 threads (see ?getDTthreads). Latest news: r-datatable.com
## **********
## This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
## This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
## **********
library(tidyr)
library("SummarizedExperiment")
## Warning: package 'SummarizedExperiment' was built under R version 4.3.2
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.2
##
## Attaching package: 'S4Vectors'
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following objects are masked from 'package:data.table':
##
## first, second
##
## The following object is masked from 'package:utils':
##
## findMatches
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:data.table':
##
## shift
##
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.3
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
##
## Attaching package: 'Biobase'
##
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
##
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:Biobase':
##
## combine
##
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
##
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
##
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
##
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
##
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
##
## The following object is masked from 'package:matrixStats':
##
## count
##
## The following objects are masked from 'package:data.table':
##
## between, first, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.18.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(ggplot2)
library(limma)
##
## Attaching package: 'limma'
##
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library(edgeR)
## Warning: package 'edgeR' was built under R version 4.3.2
library(survival)
## Warning: package 'survival' was built under R version 4.3.3
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
library(ggsci)
## Warning: package 'ggsci' was built under R version 4.3.3
library (factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR)
library (factoextra)
library(immunedeconv)
## Loading required package: EPIC
library(GSVA)
## Warning: package 'GSVA' was built under R version 4.3.3
`%nin%` = Negate(`%in%`)
################################################### GET GENE MATRIX FROM REDISCOVERTE QUANTIFICATION ######################
#Note: These are normalzied CPM log2 values which are obtained by the scaling method in edgeR,
## These counts still need to be filtered for low expression and non-variable genes, and normalized using voom method
### prior to differential expression and linear model analysis
Clinical_phenotypes_ontrx<-read.table("Clinical_phenotypes_ontrx")
gene_expression_mtrx_ontrx<-read.table("gene_expression_mtrx_ontrx")
colnames(gene_expression_mtrx_ontrx)<-gsub(".","-",fixed=TRUE,colnames(gene_expression_mtrx_ontrx))
names_ontrx<-data.frame(colnames(gene_expression_mtrx_ontrx))
colnames(names_ontrx)<-"Pathology_Accession_nb"
names_ontrx<-left_join(names_ontrx, Clinical_phenotypes_ontrx)
## Joining with `by = join_by(Pathology_Accession_nb)`
all.equal(names_ontrx$Pathology_Accession_nb,colnames(gene_expression_mtrx_ontrx))
## [1] TRUE
colnames(gene_expression_mtrx_ontrx)<-names_ontrx$CMO_ID
colnames(gene_expression_mtrx_ontrx)<-paste(colnames(gene_expression_mtrx_ontrx), sep="_","Ontrx")
gene_expression_mtrx_baseline<-read.table("gene_expression_mtrx_baseline")
colnames(gene_expression_mtrx_baseline)<-gsub(".","-",fixed=TRUE,colnames(gene_expression_mtrx_baseline))
colnames(gene_expression_mtrx_baseline)<-paste(colnames(gene_expression_mtrx_baseline), sep="_","base")
all.equal(rownames(gene_expression_mtrx_ontrx),rownames(gene_expression_mtrx_baseline))
## [1] TRUE
gene_expression_mtrx<-cbind(gene_expression_mtrx_baseline,gene_expression_mtrx_ontrx)
#get clinical data
Clinical_phenotypes<-read.table("Clinical_phenotypes_combined.txt", sep="\t", header=TRUE)
#order clinical file based on gene expresion
Ordered_pheno<-data.frame(colnames(gene_expression_mtrx))
colnames(Ordered_pheno)<-"CMO_ID"
Ordered_pheno<-left_join(Ordered_pheno,Clinical_phenotypes)
## Joining with `by = join_by(CMO_ID)`
Ordered_pheno$`Abbreviation for Figures`<-Ordered_pheno$Abbreviation.for.Figures
#perform filtering
keep.exprs <- filterByExpr(gene_expression_mtrx, group=Ordered_pheno$`Abbreviation for Figures`)
gene_expression_mtrx_filtered <- gene_expression_mtrx[which(keep.exprs==TRUE),]
dim(gene_expression_mtrx_filtered)
## [1] 29432 113
# after filtering
#29432 113
### normalize it with voom
normalized_counts<-voom(gene_expression_mtrx_filtered)
normalized_counts<-normalized_counts$E
###contring with mcp_counter mehtod
res_mcp_counter = deconvolute(normalized_counts, "mcp_counter")
##
## >>> Running mcp_counter
res_mcp_counter_table_2<-as.data.frame(res_mcp_counter %>% gather(sample, fraction, -cell_type))
res_mcp_counter_table_2_matrix<-spread(res_mcp_counter_table_2, cell_type,fraction)
res_mcp_counter_table<-dplyr::left_join(res_mcp_counter_table_2_matrix,Ordered_pheno,by=c("sample"="CMO_ID"))
rownames(res_mcp_counter_table)<-res_mcp_counter_table$sample
res_mcp_counter_table<-res_mcp_counter_table[,-1]
res_mcp_counter_table_to_plot<-res_mcp_counter_table[,1:11]
mat2<-data.frame(res_mcp_counter_table_to_plot)
####Scale the values
mat_immuno2<-t(scale(mat2))
all.equal(colnames(mat_immuno2),Ordered_pheno$CMO_ID)
## [1] "113 string mismatches"
#need to reorder
correct_order_for_batch<-data.frame(colnames(mat_immuno2))
colnames(correct_order_for_batch)<-"CMO_ID"
correct_order_for_batch<-left_join(correct_order_for_batch,Ordered_pheno)
## Joining with `by = join_by(CMO_ID)`
all.equal(colnames(mat_immuno2),correct_order_for_batch$CMO_ID)
## [1] TRUE
mat_immuno2_no_batch<-removeBatchEffect(mat_immuno2, correct_order_for_batch$batch,correct_order_for_batch$`Abbreviation for Figures`)
##################################################### DETERMINE CLUSTERS FROM MCP counter using FactoMineR analysis ###############
#set seed for reproducibilty
set.seed(2021)
#We will apply agglomerative HC using Ward's method/Euclidean distance across a range of k from 2-12; we will have FactoMineR suggest the optimal k which is does based on within-cluster sum of squares (i.e. inertia); the initial partition suggested by HC is followed by a k-means procedure to consolidate cluster membership
res.hc <- HCPC(as.data.frame(t(mat_immuno2_no_batch)),
nb.clust = -1, #Allow FactoMineR to suggest optimal partition (minimize within cluster sum of sq)
consol=TRUE, #Perform k-means consolidation after initial HC partition
iter.max = 10, #Iterations for the k-means consolidation
min = 2, #Min k
max = 12, #Max k
metric="euclidean", #Distance metric
method="ward", #Ward's method
graph = FALSE)
#Lets evaluate between group sum of squares (inertia) before and after k-means consolidation - it should go up as we are maximizing separation
res.hc$call$bw.before.consol
## [1] 2.592913
res.hc$call$bw.after.consol
## [1] 3.120352
#Now that we've performed the clustering, lets visualize the HC dendrogram using factoextra, will use the lancet color palette from ggsci package
#Note that this reflects the HC partition prior to the k-means consolidation
#Lets export the cluster assignments so that we can compare features in more detail
#write.csv(res.hc[["data.clust"]],"[specify name of output file]")
cluster_assignments<-res.hc[["data.clust"]]
# If you want to plot dendrogram
#fviz_dend(res.hc,
# cex = 0.7, # Label size
# palette = "jco", # Color palette see ?ggpubr::ggpar
# rect = TRUE, rect_fill = TRUE, # Add rectangle around groups
# rect_border = "jco", # Rectangle color
# labels_track_height = 0.8 # Augment the room for labels
#)
# Individuals facor map, This is Supplemental Figure 1
fviz_cluster(res.hc, geom = "point", main = "Factor") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
axis.title.x = element_text(size=10),axis.title.y = element_text(size=10),
axis.text.x = element_text( size=10),axis.text.y = element_text(size=10)) +
scale_color_manual(values = c("1" = "red", "2" = "#0000FF"))

#Supplemental table 2: Display quantitative variables that describe the most each cluster
res.hc$desc.var$quanti
## $`1`
## v.test Mean in category Overall mean sd in category
## Endothelial.cell -3.363652 -0.8939000 -0.65229926 0.7444703
## Neutrophil -5.978128 -0.3572731 0.10100968 0.6432925
## T.cell.CD8. -6.786158 -0.8515328 -0.28076004 0.7931594
## B.cell -6.983992 -0.6059057 -0.01852841 0.6364411
## NK.cell -7.001257 -0.8815788 -0.31519989 0.6145970
## cytotoxicity.score -7.298817 -1.0814077 -0.47907261 0.6525948
## Myeloid.dendritic.cell -7.308607 -0.8801371 -0.28172804 0.6780153
## Monocyte -7.419175 -0.5542371 -0.01419596 0.5881823
## Macrophage.Monocyte -7.419175 -0.5542371 -0.01419596 0.5881823
## T.cell -7.962981 -1.1257962 -0.49999310 0.4904141
## Overall sd p.value
## Endothelial.cell 0.7806007 7.691855e-04
## Neutrophil 0.8331246 2.257163e-09
## T.cell.CD8. 0.9140731 1.151591e-11
## B.cell 0.9140186 2.869076e-12
## NK.cell 0.8791698 2.536765e-12
## cytotoxicity.score 0.8968655 2.903093e-13
## Myeloid.dendritic.cell 0.8898263 2.699260e-13
## Monocyte 0.7910664 1.178525e-13
## Macrophage.Monocyte 0.7910664 1.178525e-13
## T.cell 0.8540900 1.679431e-15
##
## $`2`
## v.test Mean in category Overall mean sd in category
## T.cell 7.962981 0.1599447 -0.49999310 0.6297582
## Monocyte 7.419175 0.5553020 -0.01419596 0.5375842
## Macrophage.Monocyte 7.419175 0.5553020 -0.01419596 0.5375842
## Myeloid.dendritic.cell 7.308607 0.3493216 -0.28172804 0.6050962
## cytotoxicity.score 7.298817 0.1561171 -0.47907261 0.6460940
## NK.cell 7.001257 0.2820725 -0.31519989 0.7033419
## B.cell 6.983992 0.6008876 -0.01852841 0.7360507
## T.cell.CD8. 6.786158 0.3211457 -0.28076004 0.5893853
## Neutrophil 5.978128 0.5842897 0.10100968 0.7311751
## Endothelial.cell 3.363652 -0.3975203 -0.65229926 0.7355115
## Overall sd p.value
## T.cell 0.8540900 1.679431e-15
## Monocyte 0.7910664 1.178525e-13
## Macrophage.Monocyte 0.7910664 1.178525e-13
## Myeloid.dendritic.cell 0.8898263 2.699260e-13
## cytotoxicity.score 0.8968655 2.903093e-13
## NK.cell 0.8791698 2.536765e-12
## B.cell 0.9140186 2.869076e-12
## T.cell.CD8. 0.9140731 1.151591e-11
## Neutrophil 0.8331246 2.257163e-09
## Endothelial.cell 0.7806007 7.691855e-04
#show principal dimensions that are the most associated with clusters, type this:
res.hc$desc.axes$quanti
## $`1`
## v.test Mean in category Overall mean sd in category Overall sd
## Dim.1 -9.017132 -1.71767 -1.328338e-15 1.062387 2.070204
## p.value
## Dim.1 1.930773e-19
##
## $`2`
## v.test Mean in category Overall mean sd in category Overall sd
## Dim.1 9.017132 1.811362 -1.328338e-15 1.105745 2.070204
## p.value
## Dim.1 1.930773e-19
#Finally, representative individuals of each cluster can be extracted as follow:
res.hc$desc.ind$para
## Cluster: 1
## C-CPRNJ2_base C-1LM4EJ_base C-WD4RUC_base C-2M22RC_base C-6PL9CN_base
## 0.7655650 0.8278142 0.8862958 0.9670174 1.0820552
## ------------------------------------------------------------
## Cluster: 2
## C-AL8WAK_base C-CPRNJ2_Ontrx C-MA4H5N_Ontrx C-2M22RC_Ontrx C-46JU8E_Ontrx
## 0.7930102 0.8378519 0.8467655 0.9675638 1.0185951
Cluster_1_individuals_contrib<-unlist(res.hc$desc.ind$para[1])
Cluster_1_individuals_contrib<-names(Cluster_1_individuals_contrib)
Cluster_1_individuals_contrib<- gsub("1.","", Cluster_1_individuals_contrib)
Cluster_2_individuals_contrib<-unlist(res.hc$desc.ind$para[2])
Cluster_2_individuals_contrib<-names(Cluster_2_individuals_contrib)
Cluster_2_individuals_contrib<- gsub("2.","", Cluster_2_individuals_contrib)
#match orders of samples
all.equal(rownames(cluster_assignments), colnames(mat_immuno2))
## [1] TRUE
all.equal(rownames(cluster_assignments), colnames(t(mat2)))
## [1] TRUE
all.equal(rownames(cluster_assignments), colnames(mat_immuno2_no_batch))
## [1] TRUE
#call cluster names
cluster_assignments$Immune_subtypes<-ifelse(cluster_assignments$clust %in% 1, "Immune cold","Immune hot")
cluster_assignments$CMO_ID<-rownames(cluster_assignments)
#Assign cluster names into phenotyo phile
Ordered_pheno<-left_join(Ordered_pheno,cluster_assignments[,c("CMO_ID","Immune_subtypes")])
## Joining with `by = join_by(CMO_ID)`
############################################################################################################################
# GET Trasposable element counts from REDISCOVERTE, normalize
############################################################################################################################
##################################. GET Transportable element counts from REDISCOVER TE pipeline and prepare for analysis ##########################################
### get TEs expression only for LTR, LINE, SINE elements
#GEt only Intergenic baseline
INTERGENIC_TE_normalized_expression_1052_repeats_baseline<-read.table("INTERGENIC_TE_normalized_expression_1052_repeats_baseline")
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline)<-gsub(".","-",fixed=TRUE,colnames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline))
#read intergenic TEs ontrx
INTERGENIC_TE_normalized_expression_1052_repeats_ontrx<-read.table("INTERGENIC_TE_normalized_expression_1052_repeats_ontrx")
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)<-gsub(".","-",fixed=TRUE,colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx))
dim(INTERGENIC_TE_normalized_expression_1052_repeats_baseline)
## [1] 1052 67
dim(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)
## [1] 1052 46
#all 1052 are here
sharedTEs<-intersect(rownames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline),rownames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx))
#MERGE THEM ALL TOGETHER
#renames sample names for baseline and ontrx first
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline)<-paste(colnames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline), sep="_","base")
names_ontrx_TE<-data.frame(colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx))
colnames(names_ontrx_TE)<-"Pathology_Accession_nb"
names_ontrx_TE<-left_join(names_ontrx_TE, Clinical_phenotypes_ontrx)
## Joining with `by = join_by(Pathology_Accession_nb)`
all.equal(names_ontrx_TE$Pathology_Accession_nb,colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx))
## [1] TRUE
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)<-names_ontrx_TE$CMO_ID
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)<-paste(colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx), sep="_","Ontrx")
INTERGENIC_TE_normalized_expression_1052_repeats<-cbind(INTERGENIC_TE_normalized_expression_1052_repeats_baseline,INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)
#Match TEs with their familes
TE_names_order<-data.frame(rownames(INTERGENIC_TE_normalized_expression_1052_repeats))
colnames(TE_names_order)<-"repName"
TE_family_annotation<-read.table("TE_family_annotation")
TE_names_order<-left_join(TE_names_order, TE_family_annotation)
## Joining with `by = join_by(repName)`
TE_names_order$repClass<-gsub("LTR?", "LTR", fixed=TRUE, TE_names_order$repClass)
TE_names_order$repClass<-gsub("SINE?", "SINE", fixed=TRUE, TE_names_order$repClass)
#### Filter gene TE matrix filterByExpr
#TEs are rows, samples are columns, use expression per group as a minimum
#check the order of samples
all.equal(colnames(INTERGENIC_TE_normalized_expression_1052_repeats), Ordered_pheno$CMO_ID)
## [1] TRUE
keep.exprs_TE <- filterByExpr(INTERGENIC_TE_normalized_expression_1052_repeats, group=Ordered_pheno$`Abbreviation for Figures`)
INTERGENIC_TE_normalized_expression_1052_repeats_filtered <- INTERGENIC_TE_normalized_expression_1052_repeats[which(keep.exprs_TE==TRUE),]
dim(INTERGENIC_TE_normalized_expression_1052_repeats_filtered)
## [1] 995 113
all.equal(colnames(INTERGENIC_TE_normalized_expression_1052_repeats_filtered), Ordered_pheno$CMO_ID)
## [1] TRUE
###There are 844 TEs that are left here to be analysed
### normalize it with voom
normalized_intergenic_TE<-voom(INTERGENIC_TE_normalized_expression_1052_repeats_filtered)
normalized_TE_counts<-normalized_intergenic_TE$E
###Continue analysis with TE normalized data
INTERGENIC_TE_normalized_expression_1052_repeats<-data.frame(t(normalized_TE_counts))
INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed<-t(INTERGENIC_TE_normalized_expression_1052_repeats %>% mutate_at(colnames(INTERGENIC_TE_normalized_expression_1052_repeats), scale))
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed)<-rownames(INTERGENIC_TE_normalized_expression_1052_repeats)
#remove those with NAs, these are 3 TEs acros all the samples only after we transfor with Z-score
INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed<-INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed[!rowSums(is.na(INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed)) > 0,]
INTERGENIC_TE_normalized_expression_1052_repeats<-t(INTERGENIC_TE_normalized_expression_1052_repeats)
#check if Prototypes order match to TE matrix order
all.equal(Ordered_pheno$CMO_ID,colnames(INTERGENIC_TE_normalized_expression_1052_repeats))
## [1] TRUE
##################### PLOT INTERGENIC NORMALIZED EXPRESSION OVER TWO CLSUTERS
INTERGENIC_TE_normalized_expression_1052_repeats_no_batch<-removeBatchEffect(INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed, Ordered_pheno$batch,Ordered_pheno$`Abbreviation for Figures`)
#TE_intergenic_for_heatmap<-t(scale(INTERGENIC_TE_normalized_expression_1052_repeats))
TE_intergenic_for_heatmap<-INTERGENIC_TE_normalized_expression_1052_repeats_no_batch
#For_heatmap_TE_annotation<-readRDS("~/juno/work/ccs/badicm/Project_NACEV/RNAseq/REdiscoverTE/REdiscoverTE/ANALYSIS_FINAL/Rollup_analysis/ALL_INCLUDING_DICSON_projects/RESULTS/RE_intergenic_2_counts_normalized.RDS")
TE_intergenic_for_heatmap_row_anno<-data.frame(rownames(TE_intergenic_for_heatmap))
colnames(TE_intergenic_for_heatmap_row_anno)<-"repName"
# Fix some TE family names, replace "." with "-"
TE_intergenic_for_heatmap_row_anno$repName<-gsub("CR1.","CR1-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub(".int","-int",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("ERV3.","ERV3-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("ERVL.","ERVL-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("hAT.","hAT-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("HERV.","HERV-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("HUERS.","HUERS-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("L1PA15.","L1PA15-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("L2.","L2-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("MamGypsy2.","MamGypsy2-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("ORSL.","ORSL-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno<-left_join(TE_intergenic_for_heatmap_row_anno,TE_family_annotation)
## Joining with `by = join_by(repName)`
TE_intergenic_for_heatmap_row_anno$repClass<-gsub("LTR?","LTR",fixed = TRUE,TE_intergenic_for_heatmap_row_anno$repClass)
TE_intergenic_for_heatmap_row_anno$repClass<-gsub("SINE?","SINE",fixed = TRUE,TE_intergenic_for_heatmap_row_anno$repClass)
#recheck if it was replaced
TE_intergenic_for_heatmap_row_anno[is.na(TE_intergenic_for_heatmap_row_anno$repClas),]
## [1] repName repFamily repClass
## <0 rows> (or 0-length row.names)
###Create annotations and plot Supplemental Figure 5
#Annotation for columns
ann_for_2_types <- data.frame(Ordered_pheno[,c("Response","Abbreviation for Figures", "Sample_Timepoint")])
rownames(ann_for_2_types)<-Ordered_pheno$"CMO_ID"
colnames(ann_for_2_types) <- c("Response","Subtype","Sample_Timepoint")
colours_responders <- list(
'Response' = c("PD"="#543005","SD"="#A6611A" , "CR/PR"= "#F6E8C3"),
'Subtype'=c ("ANGS"="#DD67C0" ,"CHS"="#A848E0","DDLS"= "#DEABCA","LMS"= "#F4A460","OS"= "#8881D2" ,"SARCNOS"="#8DB6D1","UPS"= "#82D992","EHE"="#FFFF00","LPS"="#0000FF","MFS"="#B22222","Other"="#FF0000","SBRC"="#85E0D6","ASPS"="grey"),
'Sample_Timepoint'=c("Baseline"="blue",'Ontrx'="red")
)
colAnn2 <- HeatmapAnnotation(df = ann_for_2_types,
which = "col",
col = colours_responders,
annotation_width = unit(c(1, 4), "cm"),
gap = unit(1,"mm"))
#Annotation for rows
colours_rows <- list(
'repClass' = c("DNA"="orange","SINE"="darkpurple" , "LINE"= "darkblue","LTR"="darkgreen")
)
rowAnn <- HeatmapAnnotation(df = TE_intergenic_for_heatmap_row_anno$repClass,
which = "row",
col = colours_rows,
annotation_width = unit(c(1, 4), "cm"),
gap = unit(1,"mm"))
################################.####################################### CALCALATE TE scores ########################################
TE_matrix<-INTERGENIC_TE_normalized_expression_1052_repeats
TE_matrix<-t(TE_matrix)
#Check if the ORder pheno is the same sample order with TE_matrix
all.equal(rownames(TE_matrix) , Ordered_pheno$CMO_ID)
## [1] TRUE
INTERGENIC_TE_normalized_expression_1052_repeats[rownames(INTERGENIC_TE_normalized_expression_1052_repeats) %in% c("MER57F","Tigger17a","MER61F","MER45A","LTR104_Mam","HERVL74.int"),]
## C-VKXN13_base C-P61F3Y_base C-F5U9XF_base C-X96M0A_base
## HERVL74.int 7.364396 7.530934 10.514843 7.754191
## LTR104_Mam 8.556495 8.267900 9.194143 8.796011
## MER45A 8.623783 8.663129 10.534372 9.437557
## MER57F 6.187899 7.377311 13.637270 7.494547
## MER61F 6.264861 6.259129 9.965084 7.494547
## Tigger17a 8.420139 8.538202 9.635039 8.952130
## C-82E9M5_base C-5JJPJR_base C-KAJ2A6_base C-AL8WAK_base
## HERVL74.int 8.317869 9.760623 7.918643 8.584018
## LTR104_Mam 9.288722 9.921088 8.678623 9.140050
## MER45A 9.618202 9.087098 8.525193 9.403597
## MER57F 8.334263 7.831308 6.212756 7.426477
## MER61F 7.785521 6.690648 6.063378 6.836461
## Tigger17a 9.100277 9.301451 8.581227 8.884991
## C-532E5K_base C-C3AJRC_base C-VK8L7J_base C-LEC8C7_base
## HERVL74.int 9.188399 8.985157 6.378836 7.086029
## LTR104_Mam 8.919695 8.289513 8.140225 8.433300
## MER45A 9.553063 8.700301 9.124572 8.895990
## MER57F 8.540805 6.317527 6.749204 5.736444
## MER61F 7.240866 5.153140 9.492959 6.139800
## Tigger17a 8.589222 9.033251 8.583526 8.447584
## C-CPRNJ2_base C-CC1UXH_base C-MDEDT7_base C-6HEVUK_base
## HERVL74.int 7.245994 7.523186 8.884989 7.587443
## LTR104_Mam 8.373749 8.627137 8.769215 8.620074
## MER45A 8.945063 8.894024 9.073436 8.873087
## MER57F 6.191546 7.057522 6.334145 11.924772
## MER61F 7.413939 5.328170 7.646735 7.295839
## Tigger17a 9.268088 8.411159 8.435231 8.647468
## C-MA4H5N_base C-68NL7N_base C-TDR4W9_base C-70AEFR_base
## HERVL74.int 9.848327 8.605475 9.023267 8.074858
## LTR104_Mam 9.095952 8.970931 8.262938 7.926759
## MER45A 9.948246 8.909091 10.597237 8.209159
## MER57F 10.940178 5.717950 9.732150 6.176737
## MER61F 6.734764 7.168611 6.555118 5.380271
## Tigger17a 9.036655 9.141976 8.243174 8.566971
## C-M81LDF_base C-ELUPUM_base C-WT3724_base C-XPH66L_base
## HERVL74.int 9.658450 8.566307 8.373617 7.670697
## LTR104_Mam 8.221933 8.341434 9.443551 8.051278
## MER45A 9.700473 9.434000 8.373617 7.775667
## MER57F 4.800469 8.885258 6.179606 5.628010
## MER61F 6.385431 7.323946 6.264495 5.112702
## Tigger17a 8.958898 9.146303 8.054075 8.008866
## C-NHKURN_base C-U859RF_base C-7E9EXV_base C-LLWLEV_base
## HERVL74.int 6.950878 7.913118 9.447005 8.721269
## LTR104_Mam 8.159692 8.469511 8.599008 9.048192
## MER45A 8.980143 8.798348 9.794559 10.128991
## MER57F 5.663997 12.427797 12.179698 9.529814
## MER61F 3.342069 5.591190 7.798617 7.973141
## Tigger17a 7.734386 8.663599 8.689206 8.951976
## C-28PL34_base C-81D6WE_base C-YHLTV1_base C-TN74NL_base
## HERVL74.int 9.244488 8.262996 8.154943 9.849528
## LTR104_Mam 8.567118 8.886341 8.712072 8.958815
## MER45A 7.996122 9.833672 8.931785 9.821568
## MER57F 7.004437 8.705291 5.904717 11.706267
## MER61F 4.809421 7.913308 6.968847 6.378977
## Tigger17a 9.272300 8.598044 8.648225 8.863307
## C-8EEM67_base C-L7M7WP_base C-44CDAN_base C-CTK666_base
## HERVL74.int 8.343546 10.272545 8.608068 7.753411
## LTR104_Mam 8.776132 9.198710 8.701782 7.438255
## MER45A 10.001772 8.824802 8.646287 7.800825
## MER57F 10.843671 7.532135 5.630785 7.609375
## MER61F 8.891034 6.067779 6.143787 5.258931
## Tigger17a 9.072418 8.923768 8.486932 8.090332
## C-R5FT5U_base C-2EE23U_base C-E36838_base C-MVMX43_base
## HERVL74.int 8.928043 7.688649 8.691849 9.850345
## LTR104_Mam 8.498875 8.711251 8.646406 8.861720
## MER45A 9.161242 8.703558 10.405136 10.621913
## MER57F 5.597654 6.163188 8.523831 7.117181
## MER61F 7.238111 6.328247 9.708528 9.244293
## Tigger17a 9.057085 7.913209 9.029350 9.323210
## C-556AL5_base C-L7C8JC_base C-6RJP70_base C-NF6VH4_base
## HERVL74.int 8.641790 8.229167 8.251716 8.537329
## LTR104_Mam 8.746759 8.637863 8.370036 8.258514
## MER45A 9.699594 9.092086 8.010708 9.099986
## MER57F 5.416370 7.115096 6.935940 6.865281
## MER61F 7.804640 7.525560 5.904913 5.994014
## Tigger17a 9.345715 9.362062 8.458167 8.707924
## C-WJ49UR_base C-20FDCU_base C-4DV3D5_base C-FN268L_base
## HERVL74.int 9.076749 8.286500 8.306275 9.312354
## LTR104_Mam 8.889730 8.317197 8.427412 8.617824
## MER45A 10.255901 9.023466 9.479600 9.513052
## MER57F 8.277047 7.724024 5.852099 6.416190
## MER61F 9.059101 7.937166 6.162440 6.990426
## Tigger17a 9.059101 8.541757 8.556971 8.603817
## C-K9U229_base C-H7RF5A_base C-20LTFP_base C-WD4RUC_base
## HERVL74.int 9.043031 9.090380 9.026126 7.062021
## LTR104_Mam 8.778654 8.833524 9.056028 8.523248
## MER45A 9.277832 8.609898 9.583124 9.555191
## MER57F 5.811399 7.529974 7.924431 5.730177
## MER61F 6.909548 6.511596 7.638522 5.264513
## Tigger17a 9.015938 8.303009 8.724822 9.234140
## C-001528_base C-46JU8E_base C-WKND2T_base C-4PEVTX_base
## HERVL74.int 8.972761 10.690109 9.295882 9.227733
## LTR104_Mam 9.019810 9.262909 8.853432 8.938226
## MER45A 8.821644 10.108349 8.772157 9.003077
## MER57F 5.865233 9.155552 6.649046 7.965603
## MER61F 7.027504 8.510534 6.472890 7.965603
## Tigger17a 8.839238 9.308174 9.054726 9.162986
## C-1LM4EJ_base C-Y6TH4J_base C-JNLDDC_base C-1PCN42_base
## HERVL74.int 8.024477 10.400838 8.960997 9.908911
## LTR104_Mam 8.747158 8.494640 8.769268 9.325686
## MER45A 9.083655 9.737643 9.427987 9.642396
## MER57F 6.192031 10.297540 5.539533 9.770365
## MER61F 5.313337 7.750738 6.928576 8.330885
## Tigger17a 8.441290 9.702128 8.158443 8.819274
## C-WNVW3H_base C-70K997_base C-X867R9_base C-J0AW9Y_base
## HERVL74.int 9.115802 8.838609 9.765819 8.289175
## LTR104_Mam 8.712918 8.068091 8.517284 9.074235
## MER45A 9.973530 8.846242 9.716406 8.996495
## MER57F 7.602274 7.175006 7.616650 7.050388
## MER61F 6.779152 5.952614 6.680783 5.775766
## Tigger17a 9.130374 8.898574 8.789904 7.787354
## C-6PL9CN_base C-2M22RC_base C-2FPL8C_base C-C3AJRC_Ontrx
## HERVL74.int 8.330435 7.183097 8.519935 8.928316
## LTR104_Mam 9.151210 7.414423 8.354198 8.165478
## MER45A 9.056573 10.281587 9.514332 9.088639
## MER57F 7.202730 12.351279 11.234926 9.069573
## MER61F 5.676430 6.746998 5.209147 5.807178
## Tigger17a 8.247179 7.868989 8.427571 8.878298
## C-532E5K_Ontrx C-AL8WAK_Ontrx C-LEC8C7_Ontrx C-CPRNJ2_Ontrx
## HERVL74.int 9.959216 9.251875 6.856488 7.971688
## LTR104_Mam 9.248464 9.273775 8.359863 8.400855
## MER45A 10.135112 9.461999 8.974252 9.054465
## MER57F 10.400236 9.088486 5.811164 7.803182
## MER61F 7.537549 7.116025 5.614128 7.582916
## Tigger17a 8.962047 9.012370 8.974252 9.197322
## C-F5U9XF_Ontrx C-WT3724_Ontrx C-XPH66L_Ontrx C-NHKURN_Ontrx
## HERVL74.int 8.280177 8.419967 7.170519 7.213268
## LTR104_Mam 9.336162 9.220188 8.858575 8.092586
## MER45A 9.211085 8.804826 8.336878 8.725167
## MER57F 11.829121 7.275093 5.926594 5.827614
## MER61F 8.056054 5.983327 4.848591 5.396980
## Tigger17a 9.316054 8.401470 8.473082 8.073090
## C-LLWLEV_Ontrx C-7E9EXV_Ontrx C-MA4H5N_Ontrx C-6HEVUK_Ontrx
## HERVL74.int 9.994478 8.735906 8.694976 8.305679
## LTR104_Mam 9.162703 8.796874 8.561298 8.787387
## MER45A 10.237837 9.831284 9.326008 10.742484
## MER57F 13.041496 8.983287 8.103616 10.468365
## MER61F 8.989371 6.826418 6.919191 5.759190
## Tigger17a 9.442277 8.693781 9.358613 8.406080
## C-28PL34_Ontrx C-TDR4W9_Ontrx C-TN74NL_Ontrx C-70AEFR_Ontrx
## HERVL74.int 9.428621 8.956672 10.263260 8.399700
## LTR104_Mam 8.416166 8.216127 8.697281 8.268455
## MER45A 8.465354 10.663389 9.743871 10.001293
## MER57F 6.912725 7.599455 11.800740 10.762146
## MER61F 4.888429 6.377063 4.247248 6.077771
## Tigger17a 9.031387 8.045081 8.939739 8.565437
## C-U859RF_Ontrx C-81D6WE_Ontrx C-YHLTV1_Ontrx C-8EEM67_Ontrx
## HERVL74.int 8.781301 8.833011 9.106663 7.467569
## LTR104_Mam 8.563496 9.079706 9.142620 8.888812
## MER45A 9.681437 10.364948 9.237116 8.937515
## MER57F 13.741983 10.940229 6.290177 6.472280
## MER61F 6.696134 9.054505 5.927607 6.069020
## Tigger17a 9.152016 9.002744 8.746691 8.425505
## C-M81LDF_Ontrx C-ELUPUM_Ontrx C-44CDAN_Ontrx C-L7M7WP_Ontrx
## HERVL74.int 7.633925 8.696502 8.501446 8.902762
## LTR104_Mam 9.062768 8.569868 8.772216 8.146528
## MER45A 8.649272 10.016003 8.847373 8.604284
## MER57F 6.969109 10.832351 4.684753 5.893113
## MER61F 5.311997 6.755880 6.953242 7.228990
## Tigger17a 9.012436 9.301530 8.344262 8.600439
## C-R5FT5U_Ontrx C-556AL5_Ontrx C-WJ49UR_Ontrx C-L7C8JC_Ontrx
## HERVL74.int 9.261193 8.812480 8.592005 8.912503
## LTR104_Mam 8.820118 8.620999 8.719922 8.721981
## MER45A 8.982593 9.479301 8.095067 9.472461
## MER57F 6.336601 7.275612 7.135199 7.927300
## MER61F 7.690238 8.356102 4.523525 8.566831
## Tigger17a 8.912630 9.539631 8.319384 9.823025
## C-WKND2T_Ontrx C-NF6VH4_Ontrx C-H7RF5A_Ontrx C-4DV3D5_Ontrx
## HERVL74.int 8.923838 7.921987 7.419163 9.340892
## LTR104_Mam 8.663549 8.454753 8.829795 8.851931
## MER45A 8.932969 8.382084 9.592992 9.765822
## MER57F 6.690719 6.277215 9.823162 10.399256
## MER61F 7.843799 7.014180 6.309538 7.254531
## Tigger17a 8.923838 8.316743 8.829795 8.638797
## C-MVMX43_Ontrx C-FN268L_Ontrx C-20LTFP_Ontrx C-001528_Ontrx
## HERVL74.int 9.093408 9.214025 8.923118 8.815503
## LTR104_Mam 7.965067 8.381557 8.766528 8.984322
## MER45A 9.142929 9.593273 8.987607 8.733626
## MER57F 5.683497 5.192896 7.846946 5.723056
## MER61F 6.317369 6.833353 7.811757 5.095025
## Tigger17a 8.504996 8.866984 9.220100 8.669016
## C-46JU8E_Ontrx C-6RJP70_Ontrx C-WD4RUC_Ontrx C-1LM4EJ_Ontrx
## HERVL74.int 9.208241 10.234411 8.109089 8.737623
## LTR104_Mam 9.454092 8.728116 9.522810 8.760987
## MER45A 10.894665 8.560880 9.364204 9.223050
## MER57F 10.572124 8.065749 4.917948 7.766769
## MER61F 9.399900 5.324992 7.200882 6.010438
## Tigger17a 9.176357 8.371685 9.093206 8.597843
## C-20FDCU_Ontrx C-WNVW3H_Ontrx C-J0AW9Y_Ontrx C-6PL9CN_Ontrx
## HERVL74.int 9.122361 9.242671 8.109465 8.736709
## LTR104_Mam 8.678491 8.623945 9.063395 9.466488
## MER45A 10.316241 9.079788 9.254444 8.585702
## MER57F 8.782202 6.935281 6.682800 8.005705
## MER61F 8.847420 7.298069 6.724343 6.359989
## Tigger17a 8.678491 8.623945 8.280306 8.952687
## C-2M22RC_Ontrx
## HERVL74.int 8.264716
## LTR104_Mam 8.199127
## MER45A 10.380193
## MER57F 9.784090
## MER61F 6.860325
## Tigger17a 8.285932
#calculate TE score for significant Te features
TE_features<-c("Tigger17a","MER61F","MER45A","LTR104_Mam","HERVL74.int")
#geneSets<-list()
#geneSets$TE_features<-as.character(TE_features)
#geneSets<-lapply(geneSets, function(z){ z[!is.na(z) & z != ""]})
TE_genes_matrix<-as.matrix(TE_matrix[,colnames(TE_matrix) %in% TE_features])
TE_genes_matrix_df<-data.frame(TE_genes_matrix)
TE_genes_matrix_df$CMO_ID<-rownames(TE_genes_matrix_df)
TE_genes_matrix_df<-left_join(TE_genes_matrix_df, Ordered_pheno)
## Joining with `by = join_by(CMO_ID)`
TE_features<-data.frame(TE_features)
#calculate TE scores
geneSets<-as.list(TE_features)
geneSets$TE_features<-as.character(geneSets$TE_features)
geneSets<-lapply(geneSets, function(z){ z[!is.na(z) & z != ""]})
TE_genes_matrix<-as.matrix(TE_matrix[,colnames(TE_matrix) %in% as.character(TE_features$TE_features)])
gsva_for_TE__genes <-gsva(t(TE_genes_matrix),geneSets,method="zscore", kcdf="Gaussian",verbose=FALSE)
## Warning: Calling gsva(expr=., gset.idx.list=., method=., ...) is deprecated; use
## a method-specific parameter object (see '?gsva').
#check for same order
all.equal(colnames(gsva_for_TE__genes),Ordered_pheno$CMO_ID)
## [1] TRUE
TE_genes_matrix_df<- data.frame(t(gsva_for_TE__genes), Ordered_pheno)
############################################## Function to plot regression. ##############################################
ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)))
}
################################################. COMPARISON BETWEEN PAIRS for immune cells, TEs, TLS and IKZF1 #########################################################
#load function to run this analysis
source("Analyze_individual_groups_function.R")
###Run only one comparison at the the time, and continue to "RUN PAIRS COMPARISON" section and run till the end
##COMPARISON 1
#get pairs that go from cold to hot
Cold_to_hot_switch<-fread("Cold_to_hot_switch.txt",sep="\t", header=TRUE)
get_individual_pairs_comparison_calculations(Cold_to_hot_switch)
## Joining with `by = join_by(CMO_ID)`
## [[1]]

##
## [[2]]
## p_values
## B.cell 5.439675e-04
## Cancer.associated.fibroblast 3.197840e-01
## cytotoxicity.score 2.034926e-04
## Endothelial.cell 1.822876e-01
## Macrophage.Monocyte 9.273695e-04
## Monocyte 9.273695e-04
## Myeloid.dendritic.cell 1.564007e-02
## Neutrophil 6.199749e-03
## NK.cell 1.382360e-04
## T.cell 2.074388e-06
## T.cell.CD8. 6.321987e-05
##
## [[3]]

##
## [[4]]
##
## Paired t-test
##
## data: IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Baseline", ]$IKZF1 and IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Ontrx", ]$IKZF1
## t = -8.249, df = 9, p-value = 1.731e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.790046 -1.019560
## sample estimates:
## mean difference
## -1.404803
##
##
## [[5]]
##
## Paired t-test
##
## data: TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Baseline", ]$TE_features and TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Ontrx", ]$TE_features
## t = -4.3051, df = 9, p-value = 0.001976
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.9246235 -0.5987162
## sample estimates:
## mean difference
## -1.26167
#######################
##COMPARISON 2
#get pairs that go from hot to cold
Hot_to_cold_switch<-fread("Hot_to_cold_switch.txt",sep="\t", header=TRUE)
get_individual_pairs_comparison_calculations(Hot_to_cold_switch)
## Joining with `by = join_by(CMO_ID)`
## [[1]]

##
## [[2]]
## p_values
## B.cell 0.032370699
## Cancer.associated.fibroblast 0.702396095
## cytotoxicity.score 0.114541456
## Endothelial.cell 0.130527610
## Macrophage.Monocyte 0.006439886
## Monocyte 0.006439886
## Myeloid.dendritic.cell 0.006430051
## Neutrophil 0.010477877
## NK.cell 0.113927138
## T.cell 0.015542421
## T.cell.CD8. 0.370287787
##
## [[3]]

##
## [[4]]
##
## Paired t-test
##
## data: IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Baseline", ]$IKZF1 and IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Ontrx", ]$IKZF1
## t = 3.5089, df = 6, p-value = 0.01269
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.3914439 2.1953183
## sample estimates:
## mean difference
## 1.293381
##
##
## [[5]]
##
## Paired t-test
##
## data: TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Baseline", ]$TE_features and TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Ontrx", ]$TE_features
## t = 2.3192, df = 6, p-value = 0.05952
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.09608835 3.58526666
## sample estimates:
## mean difference
## 1.744589
#######################
##COMPARISON 3
#get pairs that go from hot to cold
NO_switch_samples<-fread("NO_switch_samples.txt",sep="\t", header=TRUE)
NO_switch_samples_cold<-NO_switch_samples[NO_switch_samples$`After treatment` %in% "Immune cold",]
get_individual_pairs_comparison_calculations(NO_switch_samples_cold)
## Joining with `by = join_by(CMO_ID)`
## [[1]]

##
## [[2]]
## p_values
## B.cell 0.167346206
## Cancer.associated.fibroblast 0.031611882
## cytotoxicity.score 0.004164780
## Endothelial.cell 0.228541727
## Macrophage.Monocyte 0.854317437
## Monocyte 0.854317437
## Myeloid.dendritic.cell 0.621754784
## Neutrophil 0.836711124
## NK.cell 0.026767226
## T.cell 0.006421621
## T.cell.CD8. 0.005220749
##
## [[3]]

##
## [[4]]
##
## Paired t-test
##
## data: IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Baseline", ]$IKZF1 and IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Ontrx", ]$IKZF1
## t = -1.2829, df = 14, p-value = 0.2204
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.0121270 0.2545154
## sample estimates:
## mean difference
## -0.3788058
##
##
## [[5]]
##
## Paired t-test
##
## data: TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Baseline", ]$TE_features and TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Ontrx", ]$TE_features
## t = -0.9825, df = 14, p-value = 0.3425
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.3481609 0.5010579
## sample estimates:
## mean difference
## -0.4235515
#######################
##COMPARISON 4
NO_switch_samples_hot<-NO_switch_samples[NO_switch_samples$`After treatment` %in% "Immune hot",]
get_individual_pairs_comparison_calculations(NO_switch_samples_hot)
## Joining with `by = join_by(CMO_ID)`
## [[1]]

##
## [[2]]
## p_values
## B.cell 0.93027100
## Cancer.associated.fibroblast 0.73652654
## cytotoxicity.score 0.32324836
## Endothelial.cell 0.75028046
## Macrophage.Monocyte 0.27660093
## Monocyte 0.27660093
## Myeloid.dendritic.cell 0.72994378
## Neutrophil 0.52900694
## NK.cell 0.79685303
## T.cell 0.23772948
## T.cell.CD8. 0.02111259
##
## [[3]]

##
## [[4]]
##
## Paired t-test
##
## data: IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Baseline", ]$IKZF1 and IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Ontrx", ]$IKZF1
## t = -1.3059, df = 13, p-value = 0.2142
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.2857205 0.3169492
## sample estimates:
## mean difference
## -0.4843857
##
##
## [[5]]
##
## Paired t-test
##
## data: TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Baseline", ]$TE_features and TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Ontrx", ]$TE_features
## t = -0.25955, df = 13, p-value = 0.7993
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.0761521 0.8453033
## sample estimates:
## mean difference
## -0.1154244
################################################. COMPARISON BETWEEN PAIRS for 24 immune pathways #########################################################
source("Analyze_individual_groups_for_24_pathways.R")
Gene_lists_for_ddGSEA<-fread("Gene_lists_for_ddGSEA.txt",header=TRUE)
C_GAS_gene_list_KEGG<-fread("C_GAS_gene_list_KEGG")
C_GAS_gene_list_KEGG<-C_GAS_gene_list_KEGG[-c(1),]
C_GAS_gene_list_KEGG<-data.frame(C_GAS_gene_list_KEGG)
colnames(C_GAS_gene_list_KEGG)<-"genes"
Gene_lists_for_ddGSEA<-Gene_lists_for_ddGSEA %>%
mutate(Genes=strsplit(Genes, ",")) %>%
unnest(Genes)
Gene_list<-split.data.frame(Gene_lists_for_ddGSEA[-1],Gene_lists_for_ddGSEA$`Gene Signature`)
geneSets<-lapply(Gene_list,"[[","Genes")
#add c-gas
geneSets$C_GAS_gene<-C_GAS_gene_list_KEGG$genes
##NOW calculate ssGSES for each sample using normalized counts
gsva_24_gene_lists<-gsva(as.matrix(normalized_counts),geneSets,method="gsva",kcdf="Gaussian" ,verbose=FALSE)
## Warning: Calling gsva(expr=., gset.idx.list=., method=., ...) is deprecated; use
## a method-specific parameter object (see '?gsva').
## Warning in .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
## : Some gene sets have size one. Consider setting 'min.sz > 1'.
all.equal(colnames(gsva_24_gene_lists),Ordered_pheno$CMO_ID)
## [1] TRUE
gsva_24_gene_lists_df<-data.frame(t(gsva_24_gene_lists))
gsva_24_gene_lists_df$CMO_ID<-rownames(gsva_24_gene_lists_df)
get_individual_pairs_comparison_calculations_24_pathways(Cold_to_hot_switch)
## Joining with `by = join_by(CMO_ID)`
## [[1]]
## p_values
## Angiogenesis 0.2787094709
## Antigen.processing.machinery 0.1257218073
## CD8.T.effector 0.0029980270
## Cell.cycle 0.8730234361
## Cell.cycle.regulators 0.0582443271
## DNA.damage.repair 0.6081471399
## DNA.replication 0.8473638822
## DNA.replication.dependent.histones 0.8014711836
## EMT 0.6553340698
## Fanconi.anemia 0.7350825508
## FGFR3.related.genees 0.8935230004
## Homologous.recombination 0.9322775679
## IL1beta.Response 0.8613585932
## Immune.checkpoint 0.1558772903
## Mismatch.repair 0.3425272455
## NFkB.response 0.0114579655
## NHEJ 0.7521802003
## Nucleotide.excision.repair 0.6682228026
## P53.signalling.pathway 0.4273972431
## Pan.F.TBRS 0.3208205237
## TNFalpha.Response 0.1953675489
## Type.I.IFN.Response 0.4343042993
## Type.II.IFN.Response 0.0004479311
## WNT.target 0.8240490397
## C_GAS_gene 0.0085568124
##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

get_individual_pairs_comparison_calculations_24_pathways(Hot_to_cold_switch)
## Joining with `by = join_by(CMO_ID)`
## [[1]]
## p_values
## Angiogenesis 0.58359440
## Antigen.processing.machinery 0.17329909
## CD8.T.effector 0.44427407
## Cell.cycle 0.29453898
## Cell.cycle.regulators 0.58839115
## DNA.damage.repair 0.25848764
## DNA.replication 0.52312171
## DNA.replication.dependent.histones 0.26997071
## EMT 0.06792738
## Fanconi.anemia 0.58211218
## FGFR3.related.genees 0.70112885
## Homologous.recombination 0.47624561
## IL1beta.Response 0.17564642
## Immune.checkpoint 0.35974875
## Mismatch.repair 0.23585416
## NFkB.response 0.46604582
## NHEJ 0.85870214
## Nucleotide.excision.repair 0.86471630
## P53.signalling.pathway 0.24932214
## Pan.F.TBRS 0.69794817
## TNFalpha.Response 0.22191441
## Type.I.IFN.Response 0.34362391
## Type.II.IFN.Response 0.31796466
## WNT.target 0.52749729
## C_GAS_gene 0.21775491
##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

get_individual_pairs_comparison_calculations_24_pathways(NO_switch_samples_cold)
## Joining with `by = join_by(CMO_ID)`
## [[1]]
## p_values
## Angiogenesis 0.83112116
## Antigen.processing.machinery 0.59720144
## CD8.T.effector 0.11815859
## Cell.cycle 0.78678211
## Cell.cycle.regulators 0.96355426
## DNA.damage.repair 0.70617025
## DNA.replication 0.69916442
## DNA.replication.dependent.histones 0.89053054
## EMT 0.58617211
## Fanconi.anemia 0.73847954
## FGFR3.related.genees 0.89407446
## Homologous.recombination 0.87668266
## IL1beta.Response 0.75004841
## Immune.checkpoint 0.06723543
## Mismatch.repair 0.93444966
## NFkB.response 0.48053525
## NHEJ 0.54077695
## Nucleotide.excision.repair 0.90088477
## P53.signalling.pathway 0.97605296
## Pan.F.TBRS 0.87135097
## TNFalpha.Response 0.88834949
## Type.I.IFN.Response 0.71075439
## Type.II.IFN.Response 0.05727897
## WNT.target 0.78709913
## C_GAS_gene 0.87069385
##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

get_individual_pairs_comparison_calculations_24_pathways(NO_switch_samples_hot)
## Joining with `by = join_by(CMO_ID)`
## [[1]]
## p_values
## Angiogenesis 0.63171267
## Antigen.processing.machinery 0.31091901
## CD8.T.effector 0.02426114
## Cell.cycle 0.35234336
## Cell.cycle.regulators 0.51290785
## DNA.damage.repair 0.64660984
## DNA.replication 0.37184510
## DNA.replication.dependent.histones 0.23184674
## EMT 0.21094083
## Fanconi.anemia 0.62550335
## FGFR3.related.genees 0.30859168
## Homologous.recombination 0.47838641
## IL1beta.Response 0.91878861
## Immune.checkpoint 0.79619146
## Mismatch.repair 0.25251872
## NFkB.response 0.55756185
## NHEJ 0.37410148
## Nucleotide.excision.repair 0.74787174
## P53.signalling.pathway 0.47291749
## Pan.F.TBRS 0.84605335
## TNFalpha.Response 0.42486164
## Type.I.IFN.Response 0.62653510
## Type.II.IFN.Response 0.07651948
## WNT.target 0.32044491
## C_GAS_gene 0.53823990
##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

#rmarkdown::render("Script_baseline_ontrx.R")